Copernicus Data Extraction

Published

2025-05-18 00:24 UTC

Code
librarian::shelf(
  dplyr, DT, fs, glue, here, logger, jsonlite, listviewer, lubridate, mapview, 
  purrr, RColorBrewer, readr, reticulate, sf, stringr, terra, tibble, tidyr, 
  units, 
  quiet = T)
options(readr.show_col_types = F)
mapviewOptions(
  basemaps       = "Esri.OceanBasemap",
  vector.palette = colorRampPalette(brewer.pal(n=5, name="Spectral")))

1 Polygons

Code
sanctuaries_rds <- here("../climate-dashboard/data/sanctuaries.rds")
buf_pct         <- 0.20 # 20% of area

ply_sanctuaries <- readRDS(sanctuaries_rds)

# buffer polygon and get bounding boxes
ply_sanctuaries <- ply_sanctuaries |>
  mutate(
    area_km2  = st_area(geom) |> 
      set_units("km^2") |> 
      as.numeric() |> 
      round(2),
    buf_km    = (sqrt(area_km2) * buf_pct) |> 
      round(2),
    bbox_geom = map2(geom, area_km2, \(g,a) {
      st_sfc(g, crs = 4326) |> 
        st_buffer(buf_km * 1000) |> 
        st_bbox() |>
        round(2) |> 
        st_as_sfc() }),
    bbox_chr  = map_chr(bbox_geom, \(x) {  # xmin, ymin, xmax and ymax
      st_bbox(x) |> 
        as.numeric() |> 
        paste(collapse = ",") } ) ) |> 
  unnest(bbox_geom)
bb_sanctuaries <- st_set_geometry(ply_sanctuaries, "bbox_geom") |> 
  select(-geom)
ply_sanctuaries <- select(ply_sanctuaries, -bbox_geom)

# show map
mapView(ply_sanctuaries, zcol = "nms") +
  mapView(bb_sanctuaries, zcol = "nms", legend = F)
Code
ply_sanctuaries |>
  st_drop_geometry() |>
  datatable()

2 Dataset

Code
# import copernicusmarine as cmt ----
# do once: create virtual enviroment and install copernicusmarine Python module
# virtualenv_create(envname = "CopernicusMarine")
# virtualenv_install(envname = "CopernicusMarine", packages = c("copernicusmarine"))
# TODO: check for CopernicusMarine env with copernicusmarine Python module

# use CopernicusMarine env with copernicusmarine Python module
use_virtualenv(virtualenv = "CopernicusMarine", required = TRUE)
cmt <- import("copernicusmarine")

# login to cmt ----

# register for username and password at https://data.marine.copernicus.eu/register
user     <- "bbest1"
pass_txt <- ifelse(
  Sys.info()[["sysname"]] == "Linux",
  "/share/private/data.marine.copernicus.eu_bbest1-password.txt",      # server
  "~/My Drive/private/data.marine.copernicus.eu_bbest1-password.txt")  # laptop

pass <- readLines(pass_txt)
logged_in <- cmt$login(user, pass, force_overwrite = T)  # py_help(cmt$login)
stopifnot(logged_in)

# DEBUG
m_yml <- here("meta/copernicus_phy.yml")
params <- yaml::read_yaml(m_yml) 
params$proc_prod <- path_ext_remove(basename(m_yml))
proc <- strsplit(params$proc_prod, "_")[[1]][1]  # erddap or copernicus
prod <- strsplit(params$proc_prod, "_")[[1]][2]  # copernicus product

dir_out <- here(glue("data/{params$proc_prod}"))
dir_create(dir_out)

# show params ----
jsonedit(params)
Code
# get times ----
get_dataset_description <- function(dataset_id){
  cmt$describe(
    dataset_id           = dataset_id,
    disable_progress_bar = T)$json() |> 
    fromJSON()
}

get_dataset_times <- function(dataset_description){
  services <- pluck(
    dataset_description, "products", "datasets", 1, "versions", 1, "parts", 1, "services", 1)
  service_names <- pluck(
    services, "service_name")
  i_timeseries <- which(service_names == "arco-time-series")
  variables <- pluck(
    services, "variables", i_timeseries, "short_name")
  coordinates <- pluck(
    services, "variables", i_timeseries, "coordinates")
  coordinate_ids <- pluck(
    coordinates, 1, "coordinate_id")   # assuming 1st variable
  i_time <- which(coordinate_ids == "time")
  time_min <- pluck(
    coordinates, 1, "minimum_value", i_time) |> 
    # convert from [milliseconds since 1970-01-01 00:00:00Z (no leap seconds)]
    as.numeric() %>% {./1000} |> 
    as.POSIXct(origin = "1970-01-01", tz = "UTC")
  time_max <- pluck(
    coordinates, 1, "maximum_value", i_time) |> 
    # convert from [milliseconds since 1970-01-01 00:00:00Z (no leap seconds)]
    as.numeric() %>% {./1000} |> 
    as.POSIXct(origin = "1970-01-01", tz = "UTC")
  time_step_secs <- pluck(
    coordinates, 1, "step", i_time) |> 
    as.numeric() %>% 
    {./1000} 
  seq.POSIXt(
    from = time_min,
    to   = time_max,
    by   = time_step_secs)
}

d_ds <- tibble(
  dataset_id = params |> 
    pluck("datasets") |> 
    map_chr("id")) |> 
  mutate(
    description = map(dataset_id, get_dataset_description),
    times       = map(description, get_dataset_times),
    time_min    = map_vec(times, min),
    time_max    = map_vec(times, max),
    n_times     = map_int(times, length))

d_ds |> 
  select(-description, -times) |> 
  datatable(
    caption  = "Copernicus dataset times.")

3 Polygon dataset years

Code
d_ds_yr <- d_ds |>
  mutate(
    d_yrs = map(times, \(ts) {
      yrs <- unique(year(ts))
      tibble(
        yr = yrs) |> 
        mutate(
          time_min = map_vec(yr, \(yr) min(ts[year(ts) == yr])),
          time_max = map_vec(yr, \(yr) max(ts[year(ts) == yr])),
          n_times  = map_int(yr, \(yr) length(ts[year(ts) == yr])) ) }) ) |> 
  select(-description, -times, -time_min, -time_max, -n_times) |>
  unnest(d_yrs) |> 
  select(ds_id = dataset_id, yr, time_min, time_max, n_times)

d_nms_ds_yr <- ply_sanctuaries |> 
  st_drop_geometry() |> 
  arrange(nms) |> 
  select(nms) |> 
  cross_join(
    d_ds_yr) |> 
  mutate(
    csv = glue("{dir_out}/{nms}/{yr}.csv"),  
    tif = glue("{dir_out}/{nms}/{yr}.tif") ) |> 
  # TODO: check existing csv
  rowid_to_column("i") |>
  relocate(i)

d_nms_ds_yr |> 
  group_by(nms, ds_id) %>%
  {if (nrow(.) > 0)
    summarize(
      .,
      n_times  = sum(n_times),
      time_min = min(time_min, na.rm = T),
      time_max = max(time_max, na.rm = T),
      .groups = "drop") else .} |>
  datatable(
    caption  = "Sanctuaries missing available ERDDAP times.",
    rownames = F,
    options  = list(
      pageLength = 5,
      lengthMenu = c(5, 50, nrow(d_nms_ds_yr))))

4 Extract dataset per polygon year

Using the Copernicus Marine Toolbox in R. See:

Code
fxn <- function(i, ds_id, nms, time_min, time_max, ...){
  # i = 1; nms = "CBNMS"; ds_id = "cmems_mod_glo_phy_my_0.083deg_P1D-m"
  # time_min = as.POSIXct("1993-01-01", tz = "UTC"); time_max = as.POSIXct("1993-12-01", tz = "UTC")
  # time_min = as.POSIXct("1993-12-02", tz = "UTC"); time_max = as.POSIXct("1993-12-31", tz = "UTC")
  # d_nms_ds_yr |> slice(1) |> attach()
  
  err <- tryCatch({
    
    log_info("{sprintf('%03d', i)}: {nms}, {ds_id}, {year(time_min)} ({time_min} to {time_max})")
    
    yr    <- year(time_min)
    r_nc  <- glue("{dir_out}/{nms}/{yr}.nc")
    r_tif <- path_ext_set(r_nc, "tif")
    p_csv <- path_ext_set(r_nc, "csv")
    bb    <- bb_sanctuaries |>
      filter(nms == !!nms) |> 
      st_bbox()
    p     <- ply_sanctuaries |>
      filter(nms == !!nms)
    vars  <- map_chr(params$variables, "id")
    
    dir_create(dirname(r_nc))
    
    res <- cmt$subset(                    # py_help( cmt$subset)
      dataset_id        = ds_id,
      service           = "timeseries",
      variables         = vars,
      minimum_longitude = bb$xmin,
      maximum_longitude = bb$xmax,
      minimum_latitude  = bb$ymin,
      maximum_latitude  = bb$ymax,
      start_datetime    = time_min,
      end_datetime      = time_max,
      minimum_depth     = params$depth[1],
      maximum_depth     = params$depth[2],
      output_directory  = dirname(r_nc),
      output_filename   = basename(r_nc),
      overwrite         = T)

    # TODO: merge with existing
    r <- rast(r_nc)
    
    # set dataset_id into layer name
    names(r) <- glue("{ds_id} {str_replace_all(names(r), '_',' ')}")
    writeRaster(r, r_tif, overwrite = T)
    file_delete(r_nc)
      
    # TODO: fxn to read:
    # r <- rast(r_tif)
    # names(r)s
    # time(r)
    # TODO: handle ?terra::depth()
    
    # zonal tif to csv ---- 
    r <- rast(r_tif)
    extract_tbl <- function(r, p, stat = "mean"){
      terra::extract(r, p, fun = stat, na.rm=T) |> 
        pivot_longer(!ID, names_to = "lyr") |> 
        separate_wider_regex(
          lyr, c(
            dataset = "^\\S+", " ", 
            var     = "\\S+", 
            depth   = "(?:\\sdepth=[.0-9]+)?",
            itime   = "\\s[0-9]+$")) |> 
        mutate(
          depth = str_replace(depth, "depth=", "") |> as.numeric(),
          itime = as.integer(itime),
          stat = !!stat,
          time = time(r)) |> 
        select(dataset, var, depth, time, stat, value)
    }
    bind_rows(
      extract_tbl(r, p, "mean"),
      extract_tbl(r, p, "sd")) |> 
      write_csv(p_csv, na = "")

    return(NA)
  }, error = function(e) {
    
    log_error(conditionMessage(e))
    return(conditionMessage(e))
  })
  
  err
} 

res <- d_nms_ds_yr |> 
  # slice(1:3) |>  # DEBUG
  slice(33:nrow(d_nms_ds_yr)) |>  # DEBUG
  mutate(
    error = pmap_chr(list(i, ds_id, nms, time_min, time_max), fxn))

4.1 Successes

Code
res |> 
  mutate(
    success = is.na(error)) |> 
  group_by(success) |> 
  summarize(
    n = n()) |> 
  datatable()

4.2 Errors (if any)

Code
res |> 
  filter(!is.na(error)) |> 
  group_by(nms) |> 
  summarize(
    n_years = n(),
    errors  = unique(error) |> paste(collapse = "\n\n----\n\n")) |> 
  datatable()
Code
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.2 (2024-10-31)
 os       Ubuntu 24.04.1 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Etc/UTC
 date     2025-05-18
 pandoc   3.5 @ /usr/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package           * version  date (UTC) lib source
 base64enc           0.1-3    2015-07-28 [1] RSPM (R 4.4.0)
 brew                1.0-10   2023-12-16 [1] RSPM (R 4.4.0)
 bslib               0.9.0    2025-01-30 [1] RSPM (R 4.4.0)
 cachem              1.1.0    2024-05-16 [1] RSPM (R 4.4.0)
 class               7.3-22   2023-05-03 [2] CRAN (R 4.4.2)
 classInt            0.4-11   2025-01-08 [1] RSPM (R 4.4.0)
 cli                 3.6.5    2025-04-23 [1] RSPM (R 4.4.0)
 codetools           0.2-20   2024-03-31 [2] CRAN (R 4.4.2)
 colorspace          2.1-1    2024-07-26 [1] RSPM (R 4.4.0)
 crosstalk           1.2.1    2023-11-23 [1] RSPM (R 4.4.0)
 DBI                 1.2.3    2024-06-02 [1] RSPM (R 4.4.0)
 devtools            2.4.5    2022-10-11 [1] RSPM (R 4.4.0)
 dichromat           2.0-0.1  2022-05-02 [1] RSPM (R 4.4.0)
 digest              0.6.37   2024-08-19 [1] RSPM (R 4.4.0)
 dplyr             * 1.1.4    2023-11-17 [1] RSPM (R 4.4.0)
 DT                * 0.33     2024-04-04 [1] RSPM (R 4.4.0)
 e1071               1.7-16   2024-09-16 [1] RSPM (R 4.4.0)
 ellipsis            0.3.2    2021-04-29 [1] RSPM (R 4.4.0)
 evaluate            1.0.3    2025-01-10 [1] RSPM (R 4.4.0)
 farver              2.1.2    2024-05-13 [1] RSPM (R 4.4.0)
 fastmap             1.2.0    2024-05-15 [1] RSPM (R 4.4.0)
 fs                * 1.6.6    2025-04-12 [1] RSPM (R 4.4.0)
 generics            0.1.4    2025-05-09 [1] RSPM (R 4.4.0)
 glue              * 1.8.0    2024-09-30 [1] RSPM (R 4.4.0)
 here              * 1.0.1    2020-12-13 [1] RSPM (R 4.4.0)
 hms                 1.1.3    2023-03-21 [1] RSPM (R 4.4.0)
 htmltools           0.5.8.1  2024-04-04 [1] RSPM (R 4.4.0)
 htmlwidgets         1.6.4    2023-12-06 [1] RSPM (R 4.4.0)
 httpuv              1.6.16   2025-04-16 [1] RSPM (R 4.4.0)
 jquerylib           0.1.4    2021-04-26 [1] RSPM (R 4.4.0)
 jsonlite          * 2.0.0    2025-03-27 [1] RSPM (R 4.4.0)
 KernSmooth          2.23-24  2024-05-17 [2] CRAN (R 4.4.2)
 knitr               1.50     2025-03-16 [1] RSPM (R 4.4.0)
 later               1.4.2    2025-04-08 [1] RSPM (R 4.4.0)
 lattice             0.22-6   2024-03-20 [2] CRAN (R 4.4.2)
 leafem              0.2.4    2025-05-01 [1] RSPM (R 4.4.0)
 leaflet             2.2.2    2024-03-26 [1] RSPM (R 4.4.0)
 leaflet.providers   2.0.0    2023-10-17 [1] RSPM (R 4.4.0)
 leafpop             0.1.0    2021-05-22 [1] RSPM (R 4.4.0)
 librarian           1.8.1    2021-07-12 [1] RSPM (R 4.4.0)
 lifecycle           1.0.4    2023-11-07 [1] RSPM (R 4.4.0)
 listviewer        * 4.0.0    2023-09-30 [1] RSPM (R 4.4.0)
 logger            * 0.4.0    2024-10-22 [1] RSPM (R 4.4.0)
 lubridate         * 1.9.4    2024-12-08 [1] RSPM (R 4.4.0)
 magrittr            2.0.3    2022-03-30 [1] RSPM (R 4.4.0)
 mapview           * 2.11.2   2023-10-13 [1] RSPM (R 4.4.0)
 Matrix              1.7-1    2024-10-18 [2] CRAN (R 4.4.2)
 memoise             2.0.1    2021-11-26 [1] RSPM (R 4.4.0)
 mime                0.13     2025-03-17 [1] RSPM (R 4.4.0)
 miniUI              0.1.1.1  2018-05-18 [1] RSPM (R 4.4.0)
 pillar              1.10.2   2025-04-05 [1] RSPM (R 4.4.0)
 pkgbuild            1.4.5    2024-10-28 [1] RSPM (R 4.4.0)
 pkgconfig           2.0.3    2019-09-22 [1] RSPM (R 4.4.0)
 pkgload             1.4.0    2024-06-28 [1] RSPM (R 4.4.0)
 png                 0.1-8    2022-11-29 [1] RSPM (R 4.4.0)
 profvis             0.4.0    2024-09-20 [1] RSPM (R 4.4.0)
 promises            1.3.2    2024-11-28 [1] RSPM (R 4.4.0)
 proxy               0.4-27   2022-06-09 [1] RSPM (R 4.4.0)
 purrr             * 1.0.4    2025-02-05 [1] RSPM (R 4.4.0)
 R6                  2.6.1    2025-02-15 [1] RSPM (R 4.4.0)
 raster              3.6-32   2025-03-28 [1] RSPM (R 4.4.0)
 RColorBrewer      * 1.1-3    2022-04-03 [1] RSPM (R 4.4.0)
 Rcpp                1.0.14   2025-01-12 [1] RSPM (R 4.4.0)
 readr             * 2.1.5    2024-01-10 [1] RSPM (R 4.4.0)
 remotes             2.5.0    2024-03-17 [1] RSPM (R 4.4.0)
 reticulate        * 1.41.0.1 2025-03-09 [1] RSPM (R 4.4.0)
 rlang               1.1.6    2025-04-11 [1] RSPM (R 4.4.0)
 rmarkdown           2.29     2024-11-04 [1] RSPM (R 4.4.0)
 rprojroot           2.0.4    2023-11-05 [1] RSPM (R 4.4.0)
 s2                  1.1.8    2025-05-12 [1] RSPM (R 4.4.2)
 sass                0.4.10   2025-04-11 [1] RSPM (R 4.4.0)
 satellite           1.0.5    2024-02-10 [1] RSPM (R 4.4.0)
 scales              1.4.0    2025-04-24 [1] RSPM (R 4.4.0)
 sessioninfo         1.2.2    2021-12-06 [1] RSPM (R 4.4.0)
 sf                * 1.0-21   2025-05-15 [1] RSPM (R 4.4.2)
 shiny               1.9.1    2024-08-01 [1] RSPM (R 4.4.0)
 sp                  2.2-0    2025-02-01 [1] RSPM (R 4.4.0)
 stringi             1.8.7    2025-03-27 [1] RSPM (R 4.4.0)
 stringr           * 1.5.1    2023-11-14 [1] RSPM (R 4.4.0)
 svglite             2.2.1    2025-05-12 [1] RSPM (R 4.4.0)
 systemfonts         1.2.3    2025-04-30 [1] RSPM (R 4.4.0)
 terra             * 1.8-50   2025-05-09 [1] RSPM (R 4.4.0)
 textshaping         1.0.1    2025-05-01 [1] RSPM (R 4.4.0)
 tibble            * 3.2.1    2023-03-20 [1] RSPM (R 4.4.0)
 tidyr             * 1.3.1    2024-01-24 [1] RSPM (R 4.4.0)
 tidyselect          1.2.1    2024-03-11 [1] RSPM (R 4.4.0)
 timechange          0.3.0    2024-01-18 [1] RSPM (R 4.4.0)
 tzdb                0.5.0    2025-03-15 [1] RSPM (R 4.4.0)
 units             * 0.8-7    2025-03-11 [1] RSPM (R 4.4.0)
 urlchecker          1.0.1    2021-11-30 [1] RSPM (R 4.4.0)
 usethis             3.1.0    2024-11-26 [1] RSPM (R 4.4.0)
 uuid                1.2-1    2024-07-29 [1] RSPM (R 4.4.0)
 vctrs               0.6.5    2023-12-01 [1] RSPM (R 4.4.0)
 withr               3.0.2    2024-10-28 [1] RSPM (R 4.4.0)
 wk                  0.9.4    2024-10-11 [1] RSPM (R 4.4.0)
 xfun                0.52     2025-04-02 [1] RSPM (R 4.4.0)
 xtable              1.8-4    2019-04-21 [1] RSPM (R 4.4.0)
 yaml                2.3.10   2024-07-26 [1] RSPM (R 4.4.0)

 [1] /usr/local/lib/R/site-library
 [2] /usr/local/lib/R/library

─ Python configuration ───────────────────────────────────────────────────────
 python:         /home/admin/.virtualenvs/CopernicusMarine/bin/python
 libpython:      /usr/lib/python3.12/config-3.12-x86_64-linux-gnu/libpython3.12.so
 pythonhome:     /home/admin/.virtualenvs/CopernicusMarine:/home/admin/.virtualenvs/CopernicusMarine
 version:        3.12.3 (main, Nov  6 2024, 18:32:19) [GCC 13.2.0]
 numpy:          /home/admin/.virtualenvs/CopernicusMarine/lib/python3.12/site-packages/numpy
 numpy_version:  2.2.5
 copernicusmarine:/home/admin/.virtualenvs/CopernicusMarine/lib/python3.12/site-packages/copernicusmarine
 
 NOTE: Python version was forced by use_python() function

──────────────────────────────────────────────────────────────────────────────